library(tidyverse)
## ── Attaching packages ─────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.3.0
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
#setwd("~/Google Drive/_OneDrive_Atimi_Software/Upgrad/_Upgrad/Capstone_project/")

General Information

general_info <- read.csv(file = "ValidFiles/Hospital General Information.csv",header = T,check.names = T,na.strings = c("Not Available",""),
                         stringsAsFactors = T)
str(general_info) 
## 'data.frame':    4818 obs. of  28 variables:
##  $ Provider.ID                                                  : int  10001 10005 10006 10007 10008 10011 10012 10016 10018 10019 ...
##  $ Hospital.Name                                                : Factor w/ 4617 levels "ABBEVILLE AREA MEDICAL CENTER",..: 3685 2227 1107 2584 913 3952 983 3592 506 1568 ...
##  $ Address                                                      : Factor w/ 4789 levels " CALLE CONCEPCION VERA AYALA #550 S",..: 468 2113 1727 3917 239 3219 1589 138 1359 810 ...
##  $ City                                                         : Factor w/ 2949 levels "ABBEVILLE","ABERDEEN",..: 699 263 875 1931 1527 236 903 20 236 2413 ...
##  $ State                                                        : Factor w/ 56 levels "AK","AL","AR",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ ZIP.Code                                                     : int  36301 35957 35631 36467 36049 35235 35968 35007 35233 35660 ...
##  $ County.Name                                                  : Factor w/ 1565 levels "ABBEVILLE","ACADIA",..: 668 879 803 350 360 711 394 1300 711 317 ...
##  $ Phone.Number                                                 : num  3.35e+09 2.57e+09 2.57e+09 3.34e+09 3.34e+09 ...
##  $ Hospital.Type                                                : Factor w/ 3 levels "Acute Care Hospitals",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Hospital.Ownership                                           : Factor w/ 10 levels "Government - Federal",..: 2 2 2 10 6 10 6 10 10 2 ...
##  $ Emergency.Services                                           : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Meets.criteria.for.meaningful.use.of.EHRs                    : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Hospital.overall.rating                                      : int  3 3 2 3 3 2 3 3 NA 2 ...
##  $ Hospital.overall.rating.footnote                             : Factor w/ 5 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 4 NA ...
##  $ Mortality.national.comparison                                : Factor w/ 3 levels "Above the National average",..: 3 2 2 3 3 3 2 3 NA 2 ...
##  $ Mortality.national.comparison.footnote                       : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 3 NA ...
##  $ Safety.of.care.national.comparison                           : Factor w/ 3 levels "Above the National average",..: 1 3 3 3 NA 2 3 1 NA 2 ...
##  $ Safety.of.care.national.comparison.footnote                  : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA 3 NA NA NA 3 NA ...
##  $ Readmission.national.comparison                              : Factor w/ 3 levels "Above the National average",..: 3 1 3 2 3 3 3 1 NA 2 ...
##  $ Readmission.national.comparison.footnote                     : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 3 NA ...
##  $ Patient.experience.national.comparison                       : Factor w/ 3 levels "Above the National average",..: 2 3 2 3 NA 2 3 2 NA 2 ...
##  $ Patient.experience.national.comparison.footnote              : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA 3 NA NA NA 3 NA ...
##  $ Effectiveness.of.care.national.comparison                    : Factor w/ 3 levels "Above the National average",..: 3 3 3 3 3 2 3 1 2 3 ...
##  $ Effectiveness.of.care.national.comparison.footnote           : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA NA NA ...
##  $ Timeliness.of.care.national.comparison                       : Factor w/ 3 levels "Above the National average",..: 3 1 1 1 1 3 1 2 3 1 ...
##  $ Timeliness.of.care.national.comparison.footnote              : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA NA NA ...
##  $ Efficient.use.of.medical.imaging.national.comparison         : Factor w/ 3 levels "Above the National average",..: 3 2 3 NA NA 3 3 2 NA 2 ...
##  $ Efficient.use.of.medical.imaging.national.comparison.footnote: Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA 3 3 NA NA NA 3 NA ...

filtering the acute care hospitals, removing the columns not needed for analysis

zdemographics <- c("Hospital.Name","Address","City","State","County.Name","Phone.Number","ZIP.Code")
zdemogrphic_vars <- which(names(general_info) %in% zdemographics)

zvar1 <- c("Hospital.Type","Hospital.Ownership","Emergency.Services","Meets.criteria.for.meaningful.use.of.EHRs")
zvar2 <- which(names(general_info) %in% zvar1)

general_info_cleaned <- general_info[,-c(zdemogrphic_vars,zvar2)]
general_info_cleaned$Hospital.overall.rating <- as.factor(general_info_cleaned$Hospital.overall.rating)

Check if the NAs in the Ratings are missing at Random in the Footnote

general_info_cleaned %>% filter(general_info_cleaned$Hospital.overall.rating.footnote %in% 
           c("Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs",
             "Data suppressed by CMS for one or more quarters")) %>% group_by(Hospital.overall.rating,Mortality.national.comparison,Readmission.national.comparison,Safety.of.care.national.comparison,Efficient.use.of.medical.imaging.national.comparison,Timeliness.of.care.national.comparison,Effectiveness.of.care.national.comparison,Patient.experience.national.comparison) %>% 
  summarise(count_rws = n()) %>% t()
##                                                      [,1] 
## Hospital.overall.rating                              NA   
## Mortality.national.comparison                        NA   
## Readmission.national.comparison                      NA   
## Safety.of.care.national.comparison                   NA   
## Efficient.use.of.medical.imaging.national.comparison NA   
## Timeliness.of.care.national.comparison               NA   
## Effectiveness.of.care.national.comparison            NA   
## Patient.experience.national.comparison               NA   
## count_rws                                            "212"
general_info_cleaned <- general_info_cleaned %>% 
  filter(!general_info_cleaned$Hospital.overall.rating.footnote %in% 
           c("Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs",
             "Data suppressed by CMS for one or more quarters"))
barplot(prop.table(table(general_info_cleaned$Hospital.overall.rating)))

general_info_cleaned %>% 
  filter(general_info_cleaned$Hospital.overall.rating.footnote %in% 
           c("Results are not available for this reporting period",
             "There are too few measures or measure groups reported to calculate a star rating or measure group score")) %>% summary()
##   Provider.ID     Hospital.overall.rating
##  Min.   : 10018   1   :  0               
##  1st Qu.:161340   2   :  0               
##  Median :261325   3   :  0               
##  Mean   :277742   4   :  0               
##  3rd Qu.:400104   5   :  0               
##  Max.   :670112   NA's:958               
##                                                                                                                             Hospital.overall.rating.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0            
##  Data suppressed by CMS for one or more quarters                                                                                            :  0            
##  Results are not available for this reporting period                                                                                        :  1            
##  There are too few measures or measure groups reported to calculate a star rating or measure group score                                    :957            
##  This hospital\x92s star rating only includes data reported on inpatient services                                                           :  0            
##                                                                                                                                                             
##               Mortality.national.comparison
##  Above the National average  :  0          
##  Below the National average  :  0          
##  Same as the National average:118          
##  NA's                        :840          
##                                            
##                                            
##                                                                                                                          Mortality.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0               
##  Data suppressed by CMS for one or more quarters                                                                                            :  0               
##  Results are not available for this reporting period                                                                                        :840               
##  NA's                                                                                                                                       :118               
##                                                                                                                                                                
##                                                                                                                                                                
##             Safety.of.care.national.comparison
##  Above the National average  :  1             
##  Below the National average  :  0             
##  Same as the National average: 20             
##  NA's                        :937             
##                                               
##                                               
##                                                                                                                       Safety.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                 
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                 
##  Results are not available for this reporting period                                                                                        :937                 
##  NA's                                                                                                                                       : 21                 
##                                                                                                                                                                  
##                                                                                                                                                                  
##              Readmission.national.comparison
##  Above the National average  :  0           
##  Below the National average  : 12           
##  Same as the National average:280           
##  NA's                        :666           
##                                             
##                                             
##                                                                                                                         Readmission.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                
##  Results are not available for this reporting period                                                                                        :666                
##  NA's                                                                                                                                       :292                
##                                                                                                                                                                 
##                                                                                                                                                                 
##           Patient.experience.national.comparison
##  Above the National average  : 67               
##  Below the National average  :  3               
##  Same as the National average: 18               
##  NA's                        :870               
##                                                 
##                                                 
##                                                                                                                     Patient.experience.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                   
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                   
##  Results are not available for this reporting period                                                                                        :870                   
##  NA's                                                                                                                                       : 88                   
##                                                                                                                                                                    
##                                                                                                                                                                    
##         Effectiveness.of.care.national.comparison
##  Above the National average  :  0                
##  Below the National average  : 60                
##  Same as the National average:199                
##  NA's                        :699                
##                                                  
##                                                  
##                                                                                                                    Effectiveness.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                     
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                     
##  Results are not available for this reporting period                                                                                        :699                     
##  NA's                                                                                                                                       :259                     
##                                                                                                                                                                      
##                                                                                                                                                                      
##           Timeliness.of.care.national.comparison
##  Above the National average  : 89               
##  Below the National average  :  0               
##  Same as the National average:105               
##  NA's                        :764               
##                                                 
##                                                 
##                                                                                                                     Timeliness.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                   
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                   
##  Results are not available for this reporting period                                                                                        :764                   
##  NA's                                                                                                                                       :194                   
##                                                                                                                                                                    
##                                                                                                                                                                    
##    Efficient.use.of.medical.imaging.national.comparison
##  Above the National average  :  0                      
##  Below the National average  :  6                      
##  Same as the National average: 32                      
##  NA's                        :920                      
##                                                        
##                                                        
##                                                                                                              Efficient.use.of.medical.imaging.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:  0                          
##  Data suppressed by CMS for one or more quarters                                                                                            :  0                          
##  Results are not available for this reporting period                                                                                        :920                          
##  NA's                                                                                                                                       : 38                          
##                                                                                                                                                                           
## 
general_info_cleaned <- general_info_cleaned %>% 
  filter(!general_info_cleaned$Hospital.overall.rating.footnote %in% 
           c("Results are not available for this reporting period",
             "There are too few measures or measure groups reported to calculate a star rating or measure group score")) 

summary(general_info_cleaned)
##   Provider.ID     Hospital.overall.rating
##  Min.   : 10001   1: 117                 
##  1st Qu.:140014   2: 684                 
##  Median :260018   3:1772                 
##  Mean   :264497   4: 964                 
##  3rd Qu.:390113   5: 111                 
##  Max.   :670098                          
##                                                                                                                             Hospital.overall.rating.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0           
##  Data suppressed by CMS for one or more quarters                                                                                            :   0           
##  Results are not available for this reporting period                                                                                        :   0           
##  There are too few measures or measure groups reported to calculate a star rating or measure group score                                    :   0           
##  This hospital\x92s star rating only includes data reported on inpatient services                                                           : 172           
##  NA's                                                                                                                                       :3476           
##               Mortality.national.comparison
##  Above the National average  : 402         
##  Below the National average  : 343         
##  Same as the National average:2618         
##  NA's                        : 285         
##                                            
##                                            
##                                                                                                                          Mortality.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0              
##  Data suppressed by CMS for one or more quarters                                                                                            :   0              
##  Results are not available for this reporting period                                                                                        : 285              
##  NA's                                                                                                                                       :3363              
##                                                                                                                                                                
##                                                                                                                                                                
##             Safety.of.care.national.comparison
##  Above the National average  : 804            
##  Below the National average  : 673            
##  Same as the National average:1428            
##  NA's                        : 743            
##                                               
##                                               
##                                                                                                                       Safety.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0                
##  Data suppressed by CMS for one or more quarters                                                                                            :   0                
##  Results are not available for this reporting period                                                                                        : 743                
##  NA's                                                                                                                                       :2905                
##                                                                                                                                                                  
##                                                                                                                                                                  
##              Readmission.national.comparison
##  Above the National average  : 811          
##  Below the National average  : 860          
##  Same as the National average:1853          
##  NA's                        : 124          
##                                             
##                                             
##                                                                                                                         Readmission.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0               
##  Data suppressed by CMS for one or more quarters                                                                                            :   0               
##  Results are not available for this reporting period                                                                                        : 124               
##  NA's                                                                                                                                       :3524               
##                                                                                                                                                                 
##                                                                                                                                                                 
##           Patient.experience.national.comparison
##  Above the National average  :1135              
##  Below the National average  :1097              
##  Same as the National average:1143              
##  NA's                        : 273              
##                                                 
##                                                 
##                                                                                                                     Patient.experience.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0                  
##  Data suppressed by CMS for one or more quarters                                                                                            :   0                  
##  Results are not available for this reporting period                                                                                        : 273                  
##  NA's                                                                                                                                       :3375                  
##                                                                                                                                                                    
##                                                                                                                                                                    
##         Effectiveness.of.care.national.comparison
##  Above the National average  : 997               
##  Below the National average  : 449               
##  Same as the National average:1997               
##  NA's                        : 205               
##                                                  
##                                                  
##                                                                                                                    Effectiveness.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0                    
##  Data suppressed by CMS for one or more quarters                                                                                            :   0                    
##  Results are not available for this reporting period                                                                                        : 205                    
##  NA's                                                                                                                                       :3443                    
##                                                                                                                                                                      
##                                                                                                                                                                      
##           Timeliness.of.care.national.comparison
##  Above the National average  :1014              
##  Below the National average  : 903              
##  Same as the National average:1376              
##  NA's                        : 355              
##                                                 
##                                                 
##                                                                                                                     Timeliness.of.care.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0                  
##  Data suppressed by CMS for one or more quarters                                                                                            :   0                  
##  Results are not available for this reporting period                                                                                        : 355                  
##  NA's                                                                                                                                       :3293                  
##                                                                                                                                                                    
##                                                                                                                                                                    
##    Efficient.use.of.medical.imaging.national.comparison
##  Above the National average  : 359                     
##  Below the National average  : 368                     
##  Same as the National average:2027                     
##  NA's                        : 894                     
##                                                        
##                                                        
##                                                                                                              Efficient.use.of.medical.imaging.national.comparison.footnote
##  Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs:   0                         
##  Data suppressed by CMS for one or more quarters                                                                                            :   0                         
##  Results are not available for this reporting period                                                                                        : 894                         
##  NA's                                                                                                                                       :2754                         
##                                                                                                                                                                           
## 
  1. We thus have conclusively arrived at the “general_info_final” dataset with 1-5 hospital ratings imputing the NAs
  2. Picking the hospital Rating from the general info cleaned dataset
z_rem_var1 <- which(names(general_info_cleaned) %in% c("Provider.ID","Hospital.overall.rating"))
general_info_final <- general_info_cleaned[is.na(general_info_cleaned$Hospital.overall.rating.footnote),z_rem_var1]
summary(general_info_final)
##   Provider.ID     Hospital.overall.rating
##  Min.   : 10001   1: 115                 
##  1st Qu.:140053   2: 661                 
##  Median :260004   3:1668                 
##  Mean   :264810   4: 921                 
##  3rd Qu.:390100   5: 111                 
##  Max.   :670098
dim(general_info_final)
## [1] 3476    2

Analysing the Complications dataset

complications_df <- read.csv(file = "ValidFiles//Complications - Hospital.csv",
                             header = T,check.names = T,stringsAsFactors = T,na.strings = c('Not Available',""))
head(complications_df)
zdemographics <- c("Hospital.Name","Address","City","State","ZIP.Code","County.Name","Phone.Number","Measure.Start.Date", "Measure.End.Date")
zdemogrphic_vars <- which(names(complications_df) %in% zdemographics)
complications_df_cleaned <- complications_df[,-zdemogrphic_vars]
head(complications_df_cleaned)
round(prop.table(summary(factor(complications_df_cleaned$Compared.to.National)))*100,2)  #- Thus NAs are 35% in the Compared.To.National variable
##       Better than the National Rate No Different than the National Rate 
##                                0.79                               56.30 
##           Number of Cases Too Small        Worse than the National Rate 
##                                6.54                                1.64 
##                                NA's 
##                               34.74
round(prop.table(summary(factor(complications_df_cleaned$Footnote)))*100,2)  #- NAs are 65% in the Footnote variable 
##                                                                                            1 - The number of cases/patients is too few to report. 
##                                                                                                                                              6.54 
##                                                                                      13 - Results cannot be calculated for this reporting period. 
##                                                                                                                                             24.70 
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs. 
##                                                                                                                                              3.38 
##                                                                                              4 - Data suppressed by CMS for one or more quarters. 
##                                                                                                                                              1.03 
##                                                                                          5 - Results are not available for this reporting period. 
##                                                                                                                                              3.68 
##                                                                                                   7 - No cases met the criteria for this measure. 
##                                                                                                                                              1.94 
##                                                                                                                                              NA's 
##                                                                                                                                             58.73

imputation of NAs in Complications dataset

  • Analysis of “footnote” variable by checking the impact on the score variable
# Distribution of Score and Denominator by Footnote
complications_df_cleaned %>% group_by(Footnote) %>% 
  summarise(cnt_rows = n(),Avg_Score = mean(Score,na.rm = T),Total_Score = sum(Score,na.rm = T)) %>% arrange(Avg_Score)
  • Footnote Variable: Footnote levels which have specific reasons do not contribute to score variable for the specific Provided.ID, these rows can be imputed from analysis.
zvar1 <- is.na(complications_df$Footnote) # Blank Footnotes
zvar2 <- which(names(complications_df_cleaned) %in% "Footnote")
complications_df_cleaned_footnote_nas <- complications_df_cleaned[zvar1,-zvar2]

summary(complications_df_cleaned_footnote_nas)
##   Provider.ID                                               Measure.Name  
##  Min.   : 10001   Serious complications                           : 3224  
##  1st Qu.:110105   Collapsed lung due to medical treatment         : 3202  
##  Median :260017   Accidental cuts and tears from medical treatment: 3199  
##  Mean   :262101   Infections from a large venous catheter         : 3191  
##  3rd Qu.:390116   Pressure sores                                  : 3078  
##  Max.   :670106   Serious blood clots after surgery               : 2984  
##                   (Other)                                         :12246  
##                      Measure.ID   
##  PSI_90_SAFETY            : 3224  
##  PSI_6_IAT_PTX            : 3202  
##  PSI_15_ACC_LAC           : 3199  
##  PSI_7_CVCBI              : 3191  
##  PSI_3_ULCER              : 3078  
##  PSI_12_POSTOP_PULMEMB_DVT: 2984  
##  (Other)                  :12246  
##                           Compared.to.National  Denominator   
##  Better than the National Rate      :  419     Min.   :   25  
##  No Different than the National Rate:29837     1st Qu.:  163  
##  Number of Cases Too Small          :    0     Median :  697  
##  Worse than the National Rate       :  868     Mean   : 2260  
##                                                3rd Qu.: 2713  
##                                                Max.   :75770  
##                                                NA's   :3224   
##      Score         Lower.Estimate   Higher.Estimate 
##  Min.   :  0.030   Min.   :  0.00   Min.   :  0.06  
##  1st Qu.:  0.330   1st Qu.:  0.00   1st Qu.:  0.71  
##  Median :  1.065   Median :  0.18   Median :  1.88  
##  Mean   : 10.035   Mean   :  6.11   Mean   : 14.11  
##  3rd Qu.:  3.200   3rd Qu.:  1.38   3rd Qu.:  5.33  
##  Max.   :212.160   Max.   :175.29   Max.   :249.04  
## 
  • Imputing the “NAs” resulted in inducing the variability in each of the variables.

which variable is contributing most NAs in “Score” variable?

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
complications_df_cleaned_footnote_nas %>% group_by(Measure.ID) %>% 
  summarise(cnt_rws = n(), Sum_score = sum(Score,na.rm = T),
            mean_Score = mean(Score,na.rm = T)) %>% 
  arrange(desc(cnt_rws)) %>% melt(value.name = c("value")) %>%
  ggplot(aes(x = Measure.ID,y = value)) + geom_col() + facet_wrap(facets = ~ variable,scales = "free",ncol = 3) + 
  theme(axis.text.x = element_text(angle = 90,vjust = 0.5,hjust = 1))
## Using Measure.ID as id variables

complications_df_cleaned_footnote_nas[is.na(complications_df_cleaned_footnote_nas$Denominator),] %>% summary()
##   Provider.ID    
##  Min.   : 10001  
##  1st Qu.:110110  
##  Median :260024  
##  Mean   :263357  
##  3rd Qu.:390148  
##  Max.   :670106  
##                  
##                                                            Measure.Name 
##  Serious complications                                           :3224  
##  A wound that splits open  after surgery on the abdomen or pelvis:   0  
##  Accidental cuts and tears from medical treatment                :   0  
##  Blood stream infection after surgery                            :   0  
##  Broken hip from a fall after surgery                            :   0  
##  Collapsed lung due to medical treatment                         :   0  
##  (Other)                                                         :   0  
##                      Measure.ID  
##  PSI_90_SAFETY            :3224  
##  COMP_HIP_KNEE            :   0  
##  PSI_12_POSTOP_PULMEMB_DVT:   0  
##  PSI_13_POST_SEPSIS       :   0  
##  PSI_14_POSTOP_DEHIS      :   0  
##  PSI_15_ACC_LAC           :   0  
##  (Other)                  :   0  
##                           Compared.to.National  Denominator  
##  Better than the National Rate      : 101      Min.   : NA   
##  No Different than the National Rate:2937      1st Qu.: NA   
##  Number of Cases Too Small          :   0      Median : NA   
##  Worse than the National Rate       : 186      Mean   :NaN   
##                                                3rd Qu.: NA   
##                                                Max.   : NA   
##                                                NA's   :3224  
##      Score       Lower.Estimate   Higher.Estimate
##  Min.   :0.440   Min.   :0.1500   Min.   :0.660  
##  1st Qu.:0.790   1st Qu.:0.3900   1st Qu.:1.110  
##  Median :0.875   Median :0.4900   Median :1.250  
##  Mean   :0.890   Mean   :0.5378   Mean   :1.242  
##  3rd Qu.:0.960   3rd Qu.:0.6400   3rd Qu.:1.370  
##  Max.   :2.140   Max.   :1.9400   Max.   :2.460  
## 
zvar1 <- c( "Provider.ID", "Measure.ID", "Score")
zvar2 <- which(names(complications_df_cleaned_footnote_nas) %in% zvar1)

complications_df_final <-  complications_df_cleaned_footnote_nas[,zvar2] %>%  spread(key = Measure.ID,value = Score)
summary(complications_df_final)
##   Provider.ID     COMP_HIP_KNEE   PSI_12_POSTOP_PULMEMB_DVT
##  Min.   : 10001   Min.   :1.500   Min.   : 1.390           
##  1st Qu.:110234   1st Qu.:2.700   1st Qu.: 3.870           
##  Median :260063   Median :3.000   Median : 4.750           
##  Mean   :264984   Mean   :3.042   Mean   : 5.035           
##  3rd Qu.:390146   3rd Qu.:3.400   3rd Qu.: 5.763           
##  Max.   :670106   Max.   :6.000   Max.   :20.880           
##                   NA's   :770     NA's   :500              
##  PSI_13_POST_SEPSIS PSI_14_POSTOP_DEHIS PSI_15_ACC_LAC   PSI_3_ULCER     
##  Min.   : 4.50      Min.   :1.180       Min.   :0.320   Min.   : 0.0300  
##  1st Qu.: 9.04      1st Qu.:2.130       1st Qu.:1.140   1st Qu.: 0.2100  
##  Median : 9.78      Median :2.230       Median :1.380   Median : 0.3400  
##  Mean   :10.25      Mean   :2.319       Mean   :1.440   Mean   : 0.4516  
##  3rd Qu.:11.26      3rd Qu.:2.470       3rd Qu.:1.665   3rd Qu.: 0.4600  
##  Max.   :27.96      Max.   :4.980       Max.   :6.180   Max.   :10.3500  
##  NA's   :1176       NA's   :944         NA's   :285     NA's   :406      
##  PSI_4_SURG_COMP  PSI_6_IAT_PTX     PSI_7_CVCBI     PSI_8_POST_HIP
##  Min.   : 70.79   Min.   :0.1900   Min.   :0.0300   Min.   :0.06  
##  1st Qu.:124.44   1st Qu.:0.3600   1st Qu.:0.1300   1st Qu.:0.06  
##  Median :135.57   Median :0.4000   Median :0.1600   Median :0.06  
##  Mean   :136.76   Mean   :0.4067   Mean   :0.1694   Mean   :0.06  
##  3rd Qu.:148.13   3rd Qu.:0.4400   3rd Qu.:0.1700   3rd Qu.:0.06  
##  Max.   :212.16   Max.   :0.8800   Max.   :1.2300   Max.   :0.06  
##  NA's   :1666     NA's   :282      NA's   :293      NA's   :618   
##  PSI_90_SAFETY  
##  Min.   :0.440  
##  1st Qu.:0.790  
##  Median :0.875  
##  Mean   :0.890  
##  3rd Qu.:0.960  
##  Max.   :2.140  
##  NA's   :260
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
pairs.panels(x = complications_df_final[2:ncol(complications_df_final)],
             bg = rainbow(n = 12),
             smooth = TRUE,
             ellipses = TRUE,pch = 21,cex.cor = 0.05,cex.labels = 0.5)
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in min(diff(breaks)): no non-missing arguments to min; returning
## Inf
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero

cor.plot(complications_df_final[,-1],numbers = T,show.legend = F,cex = 0.7,xlas = 2,cex.axis = 0.7)
## Warning in cor(x, use = use, method = method): the standard deviation is
## zero

zvar1 <- which(names(complications_df_final) %in% c("Provider.ID","PSI_90_SAFETY","COMP_HIP_KNEE","PSI_4_SURG_COMP"))
complications_df_final <- complications_df_final[,zvar1]

Analysis for Healthcare Associated Infections - For Hosptial

hai_df <- read.csv(file = "~/Google Drive/_OneDrive_Atimi_Software/Upgrad/_Upgrad/Capstone_project/capstone_project/ValidFiles/Healthcare Associated Infections - Hospital.csv",na.strings = c("Not Available",""))
head(hai_df)
summary(hai_df)
##   Provider.ID                             Hospital.Name   
##  Min.   : 10001   MEMORIAL HOSPITAL              :   720  
##  1st Qu.:140185   COMMUNITY MEMORIAL HOSPITAL    :   384  
##  Median :260037   GOOD SAMARITAN HOSPITAL        :   336  
##  Mean   :267984   SHRINERS HOSPITALS FOR CHILDREN:   336  
##  3rd Qu.:390211   ST JOSEPH HOSPITAL             :   336  
##  Max.   :670112   MEMORIAL MEDICAL CENTER        :   288  
##                   (Other)                        :228864  
##                      Address                 City            State       
##  100 HOSPITAL DRIVE      :   336   CHICAGO     :  1392   TX     : 19536  
##  100 MEDICAL CENTER DRIVE:   192   HOUSTON     :  1248   CA     : 16560  
##  200 HOSPITAL DRIVE      :   192   LOS ANGELES :   960   FL     :  8976  
##  ONE HOSPITAL DRIVE      :   144   DALLAS      :   912   IL     :  8592  
##  1 HOSPITAL DRIVE        :    96   PHILADELPHIA:   912   NY     :  8352  
##  1 MEDICAL CENTER DRIVE  :    96   COLUMBUS    :   864   PA     :  8160  
##  (Other)                 :230208   (Other)     :224976   (Other):161088  
##     ZIP.Code          County.Name      Phone.Number      
##  Min.   :  603   LOS ANGELES:  3984   Min.   :9.369e+08  
##  1st Qu.:33013   COOK       :  2448   1st Qu.:4.018e+09  
##  Median :55396   JEFFERSON  :  2256   Median :6.053e+09  
##  Mean   :53956   MONTGOMERY :  2112   Mean   :5.849e+09  
##  3rd Qu.:76020   WASHINGTON :  2112   3rd Qu.:7.878e+09  
##  Max.   :99929   (Other)    :217632   Max.   :9.899e+09  
##                  NA's       :   720                      
##                                                                         Measure.Name   
##  C.diff Lower Confidence Limit                                                :  4818  
##  C.diff Observed Cases                                                        :  4818  
##  C.diff Patient Days                                                          :  4818  
##  C.diff Predicted Cases                                                       :  4818  
##  C.diff Upper Confidence Limit                                                :  4818  
##  Catheter-associated urinary tract infections (CAUTI) in ICUs and select wards:  4818  
##  (Other)                                                                      :202356  
##            Measure.ID                               Compared.to.National
##  HAI_1_CI_LOWER :  4818   Better than the National Benchmark  :  3842   
##  HAI_1_CI_UPPER :  4818   No Different than National Benchmark: 12884   
##  HAI_1_DOPC_DAYS:  4818   Worse than the National Benchmark   :   808   
##  HAI_1_ELIGCASES:  4818   NA's                                :213730   
##  HAI_1_NUMERATOR:  4818                                                 
##  HAI_1_SIR      :  4818                                                 
##  (Other)        :202356                                                 
##      Score         
##  Min.   :     0.0  
##  1st Qu.:     0.4  
##  Median :     1.5  
##  Mean   :  2745.3  
##  3rd Qu.:    14.1  
##  Max.   :702243.0  
##  NA's   :101886    
##                                                                                                                                               Footnote     
##  5 - Results are not available for this reporting period.                                                                                         : 33490  
##  12 - This measure does not apply to this hospital for this reporting period.                                                                     : 29598  
##  13 - Results cannot be calculated for this reporting period.                                                                                     : 26307  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:  7920  
##  8 - The lower limit of the confidence interval cannot be calculated if the number of observed infections equals zero.                            :  3302  
##  (Other)                                                                                                                                          :  2756  
##  NA's                                                                                                                                             :127891  
##  Measure.Start.Date   Measure.End.Date 
##  1/1/2015:231264    12/31/2015:231264  
##                                        
##                                        
##                                        
##                                        
##                                        
## 
zdemographics <- c("Hospital.Name","Address","City","State","County.Name","Phone.Number","ZIP.Code","Measure.Start.Date","Measure.End.Date")
hai_df_cleaned <- hai_df[,-which(names(hai_df) %in% c(zdemographics))]
str(hai_df_cleaned)
## 'data.frame':    231264 obs. of  6 variables:
##  $ Provider.ID         : int  10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
##  $ Measure.Name        : Factor w/ 48 levels "C.diff Lower Confidence Limit",..: 25 29 26 28 27 19 21 24 20 23 ...
##  $ Measure.ID          : Factor w/ 48 levels "HAI_1_CI_LOWER",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Compared.to.National: Factor w/ 3 levels "Better than the National Benchmark",..: NA NA NA NA NA 3 NA NA NA NA ...
##  $ Score               : num  1.03 2.77 7117 9.2 16 ...
##  $ Footnote            : Factor w/ 8 levels "12 - This measure does not apply to this hospital for this reporting period.",..: NA NA NA NA NA NA NA NA NA NA ...
summary(hai_df_cleaned)
##   Provider.ID    
##  Min.   : 10001  
##  1st Qu.:140185  
##  Median :260037  
##  Mean   :267984  
##  3rd Qu.:390211  
##  Max.   :670112  
##                  
##                                                                         Measure.Name   
##  C.diff Lower Confidence Limit                                                :  4818  
##  C.diff Observed Cases                                                        :  4818  
##  C.diff Patient Days                                                          :  4818  
##  C.diff Predicted Cases                                                       :  4818  
##  C.diff Upper Confidence Limit                                                :  4818  
##  Catheter-associated urinary tract infections (CAUTI) in ICUs and select wards:  4818  
##  (Other)                                                                      :202356  
##            Measure.ID                               Compared.to.National
##  HAI_1_CI_LOWER :  4818   Better than the National Benchmark  :  3842   
##  HAI_1_CI_UPPER :  4818   No Different than National Benchmark: 12884   
##  HAI_1_DOPC_DAYS:  4818   Worse than the National Benchmark   :   808   
##  HAI_1_ELIGCASES:  4818   NA's                                :213730   
##  HAI_1_NUMERATOR:  4818                                                 
##  HAI_1_SIR      :  4818                                                 
##  (Other)        :202356                                                 
##      Score         
##  Min.   :     0.0  
##  1st Qu.:     0.4  
##  Median :     1.5  
##  Mean   :  2745.3  
##  3rd Qu.:    14.1  
##  Max.   :702243.0  
##  NA's   :101886    
##                                                                                                                                               Footnote     
##  5 - Results are not available for this reporting period.                                                                                         : 33490  
##  12 - This measure does not apply to this hospital for this reporting period.                                                                     : 29598  
##  13 - Results cannot be calculated for this reporting period.                                                                                     : 26307  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:  7920  
##  8 - The lower limit of the confidence interval cannot be calculated if the number of observed infections equals zero.                            :  3302  
##  (Other)                                                                                                                                          :  2756  
##  NA's                                                                                                                                             :127891
hai_df_cleaned %>% group_by(Footnote) %>% summarise(count_rows = n(),score_total = sum(Score,na.rm = T))
zvar1 <- which(names(hai_df_cleaned) %in% c("Footnote"))
hai_df_cleaned <- hai_df_cleaned[is.na(hai_df_cleaned$Footnote),-zvar1]  

# removing the invalid footnotes and Footnote variable that do not contribute zvar
summary(hai_df_cleaned)
##   Provider.ID                                     Measure.Name   
##  Min.   : 10001   CAUTI: Number of Urinary Catheter Days:  3641  
##  1st Qu.:110082   CAUTI: Observed Cases                 :  3641  
##  Median :250050   CAUTI: Predicted Cases                :  3641  
##  Mean   :259546   C.diff Patient Days                   :  3595  
##  3rd Qu.:390096   C.diff Observed Cases                 :  3594  
##  Max.   :670103   C.diff Predicted Cases                :  3594  
##                   (Other)                               :106185  
##            Measure.ID                               Compared.to.National
##  HAI_2_DOPC_DAYS:  3641   Better than the National Benchmark  :  3828   
##  HAI_2_ELIGCASES:  3641   No Different than National Benchmark: 12833   
##  HAI_2_NUMERATOR:  3641   Worse than the National Benchmark   :   807   
##  HAI_6_DOPC_DAYS:  3595   NA's                                :110423   
##  HAI_6_ELIGCASES:  3594                                                 
##  HAI_6_NUMERATOR:  3594                                                 
##  (Other)        :106185                                                 
##      Score         
##  Min.   :     0.0  
##  1st Qu.:     0.4  
##  Median :     1.6  
##  Mean   :  2770.2  
##  3rd Qu.:    14.1  
##  Max.   :702243.0  
## 

determine the outliers in the Score

ggplot(data = hai_df_cleaned,aes(x = hai_df_cleaned$Measure.ID,y = Score)) + 
  geom_boxplot() + xlab("Measure.ID") +
  theme(axis.text.x = element_text(angle = 90))

hai_df %>% group_by(Measure.Name,Measure.ID) %>% summarise(cnt_rws = n())

-Clostridium difficile (C. difficile) is a bacteria that causes diarrhea and can lead to serious complications. Those at highest risk for C. difficile infection include people who take antibiotics and also receive care in any medical setting, including hospitals. C. difficile bacteria produce spores that can be spread from patient to patient. Symptoms from C. diff infections often take a few days to develop. Patients are tested for C. diff. infections if they show signs of illness while in the hospital. This measure compares the number of stool specimens that tested positive for C. diff toxin four or more days after the patient entered the hospital to a national benchmark.

HAI_1_SIR, HAI_1a_SIR, HAI_2_SIR, HAI_2a_SIR, HAI_6_SIR,HAI_4_SIR, HAI_5_SIR, HAI_3_SIR

HAI-1 measure tracks central-line associated bloodstream infections (CLABSI) in ICUs and select wards. HAI-2 measure tracks catheter-associated urinary tract infections (CAUTI) in ICUs and select wards. HAI-3 Surgical Site Infection from colon surgery (SSI: Colon) HAI-4 Surgical Site Infection from abdominal hysterectomy (SSI: Hysterectomy) HAI-5 Methicillin-resistant Staphylococcus Aureus (MRSA) Blood Laboratory-identified Events (Bloodstream infections) HAI-6 Clostridium difficile (C.diff.) Laboratory-identified Events (Intestinal infections)

hai_measures <- c("HAI_1_SIR", "HAI_2_SIR", "HAI_3_SIR", "HAI_4_SIR", "HAI_5_SIR", "HAI_6_SIR")

# Filterig the measure.ids useful for analysis
hai_df_cleaned <- hai_df_cleaned[which(hai_df_cleaned$Measure.ID %in% hai_measures),]
ggplot(data = hai_df_cleaned,aes(x = Measure.ID,y = Score)) + 
  geom_boxplot(na.rm = T) + xlab("Measure.ID") +
  theme(axis.text.x = element_text(angle = 90))

ggplot(data = hai_df_cleaned,aes(x = Measure.ID,y = Score)) + 
  geom_col(position = position_stack(),na.rm = TRUE) + xlab("Measure.ID") + labs(title = "Hospital Associated Infections by Measure and Score") +
  theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1))

ggplot(data = hai_df_cleaned,aes(x = Measure.ID)) + 
  geom_bar(position = position_stack(),na.rm = TRUE) + xlab("Measure.ID") + labs(title = "Hospital Associated Infections by Measure by Hospital Count") +
  theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1))

zvar1 <- c( "Provider.ID", "Measure.ID", "Score")
zvar2 <- which(names(hai_df_cleaned) %in% zvar1)

hai_df_final <- hai_df_cleaned[,zvar2] %>% spread(key = "Measure.ID",value = "Score")
summary(hai_df_final)
##   Provider.ID       HAI_1_SIR        HAI_2_SIR        HAI_3_SIR     
##  Min.   : 10001   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:110200   1st Qu.:0.2070   1st Qu.:0.1270   1st Qu.:0.3998  
##  Median :250110   Median :0.4640   Median :0.4205   Median :0.8325  
##  Mean   :262973   Mean   :0.5408   Mean   :0.4787   Mean   :0.9642  
##  3rd Qu.:390155   3rd Qu.:0.7465   3rd Qu.:0.7008   3rd Qu.:1.4085  
##  Max.   :670098   Max.   :5.1900   Max.   :7.0140   Max.   :5.2160  
##                   NA's   :943      NA's   :432      NA's   :1270    
##    HAI_4_SIR        HAI_5_SIR        HAI_6_SIR     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.4280   1st Qu.:0.4920  
##  Median :0.6985   Median :0.8240   Median :0.7985  
##  Mean   :0.8706   Mean   :0.9892   Mean   :0.8170  
##  3rd Qu.:1.3390   3rd Qu.:1.3800   3rd Qu.:1.1140  
##  Max.   :4.8190   Max.   :7.0170   Max.   :3.7410  
##  NA's   :2454     NA's   :1485     NA's   :64

Creating a Patient Safety group

gen_inf_compli_df <- merge(x = general_info_final,y = complications_df_final,all = TRUE, by = intersect(x = names(general_info_final),y = names(complications_df_final)))

safety_of_care_group <- merge(x = gen_inf_compli_df,y = hai_df_final,all = TRUE,by = intersect(x = names(gen_inf_compli_df),y = names(hai_df_final)))

head(safety_of_care_group)

ANALYSIS OF HCAPHS SURVEY

hcaphs_df <- read.csv(file = "ValidFiles/HCAHPS - Hospital.csv",header = TRUE,check.names = TRUE,na.strings = c("Not Available","Not Applicable",""))

removing the demographic columns, Mesaure Start Date and measure end data columns

zdemographics
## [1] "Hospital.Name"      "Address"            "City"              
## [4] "State"              "County.Name"        "Phone.Number"      
## [7] "ZIP.Code"           "Measure.Start.Date" "Measure.End.Date"
hcaphs_df_cleaned <- hcaphs_df[,-which(names(hcaphs_df) %in% zdemographics)]

H_COMP_1_LINEAR_SCORE, H_COMP_2_LINEAR_SCORE, H_COMP_3_LINEAR_SCORE, H_COMP_4_LINEAR_SCORE, H_COMP_5_LINEAR_SCORE, H_COMP_6_LINEAR_SCORE, H_COMP_7_LINEAR_SCORE, H_HSP_RATING_LINEAR_SCORE, H_QUIET_LINEAR_SCORE,H_RECMND_LINEAR_SCORE, H_CLEAN_LINEAR_SCORE,

H_COMP_1, H_COMP_2, H_COMP_3, H_COMP_4, H_COMP_5, H_COMP_6, H_COMP_7, H_HSP_RATING, H_QUIET,H_RECMND, H_CLEAN_HSP

levels(hcaphs_df$HCAHPS.Measure.ID)
##  [1] "H_CLEAN_HSP_A_P"           "H_CLEAN_HSP_SN_P"         
##  [3] "H_CLEAN_HSP_U_P"           "H_CLEAN_LINEAR_SCORE"     
##  [5] "H_CLEAN_STAR_RATING"       "H_COMP_1_A_P"             
##  [7] "H_COMP_1_LINEAR_SCORE"     "H_COMP_1_SN_P"            
##  [9] "H_COMP_1_STAR_RATING"      "H_COMP_1_U_P"             
## [11] "H_COMP_2_A_P"              "H_COMP_2_LINEAR_SCORE"    
## [13] "H_COMP_2_SN_P"             "H_COMP_2_STAR_RATING"     
## [15] "H_COMP_2_U_P"              "H_COMP_3_A_P"             
## [17] "H_COMP_3_LINEAR_SCORE"     "H_COMP_3_SN_P"            
## [19] "H_COMP_3_STAR_RATING"      "H_COMP_3_U_P"             
## [21] "H_COMP_4_A_P"              "H_COMP_4_LINEAR_SCORE"    
## [23] "H_COMP_4_SN_P"             "H_COMP_4_STAR_RATING"     
## [25] "H_COMP_4_U_P"              "H_COMP_5_A_P"             
## [27] "H_COMP_5_LINEAR_SCORE"     "H_COMP_5_SN_P"            
## [29] "H_COMP_5_STAR_RATING"      "H_COMP_5_U_P"             
## [31] "H_COMP_6_LINEAR_SCORE"     "H_COMP_6_N_P"             
## [33] "H_COMP_6_STAR_RATING"      "H_COMP_6_Y_P"             
## [35] "H_COMP_7_A"                "H_COMP_7_D_SD"            
## [37] "H_COMP_7_LINEAR_SCORE"     "H_COMP_7_SA"              
## [39] "H_COMP_7_STAR_RATING"      "H_HSP_RATING_0_6"         
## [41] "H_HSP_RATING_7_8"          "H_HSP_RATING_9_10"        
## [43] "H_HSP_RATING_LINEAR_SCORE" "H_HSP_RATING_STAR_RATING" 
## [45] "H_QUIET_HSP_A_P"           "H_QUIET_HSP_SN_P"         
## [47] "H_QUIET_HSP_U_P"           "H_QUIET_LINEAR_SCORE"     
## [49] "H_QUIET_STAR_RATING"       "H_RECMND_DN"              
## [51] "H_RECMND_DY"               "H_RECMND_LINEAR_SCORE"    
## [53] "H_RECMND_PY"               "H_RECMND_STAR_RATING"     
## [55] "H_STAR_RATING"
hcaphs_measures <- c("H_COMP_1_LINEAR_SCORE", "H_COMP_2_LINEAR_SCORE", "H_COMP_3_LINEAR_SCORE", "H_COMP_4_LINEAR_SCORE", "H_COMP_5_LINEAR_SCORE", 
"H_COMP_6_LINEAR_SCORE", "H_COMP_7_LINEAR_SCORE", "H_HSP_RATING_LINEAR_SCORE", "H_QUIET_LINEAR_SCORE","H_RECMND_LINEAR_SCORE", "H_CLEAN_LINEAR_SCORE")

hcaphs_df_measures <- hcaphs_df_cleaned %>% filter(hcaphs_df_cleaned$HCAHPS.Measure.ID %in% hcaphs_measures)
hcaphs_df_measures$HCAHPS.Measure.ID <- as.character(hcaphs_df_measures$HCAHPS.Measure.ID)
hcaphs_df_measures$HCAHPS.Measure.ID <- str_replace(string = hcaphs_df_measures$HCAHPS.Measure.ID,pattern = "_LINEAR_SCORE",replacement = "")
hcaphs_df_measures$HCAHPS.Measure.ID <- as.factor(hcaphs_df_measures$HCAHPS.Measure.ID)
zdistri_vars <- c("Provider.ID","HCAHPS.Measure.ID","HCAHPS.Linear.Mean.Value","Number.of.Completed.Surveys",
                  "Survey.Response.Rate.Percent.Footnote","Number.of.Completed.Surveys.Footnote")

hcaphs_df_measures_distri <- hcaphs_df_measures[,zdistri_vars] 
hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$HCAHPS.Linear.Mean.Value),] %>% summary()
##   Provider.ID     HCAHPS.Measure.ID HCAHPS.Linear.Mean.Value
##  Min.   : 10008   H_CLEAN :1310     Min.   : NA             
##  1st Qu.:161366   H_COMP_1:1310     1st Qu.: NA             
##  Median :271317   H_COMP_2:1310     Median : NA             
##  Mean   :284193   H_COMP_3:1310     Mean   :NaN             
##  3rd Qu.:400130   H_COMP_4:1310     3rd Qu.: NA             
##  Max.   :670112   H_COMP_5:1310     Max.   : NA             
##                   (Other) :6550     NA's   :14410           
##  Number.of.Completed.Surveys
##  FEWER THAN 50:3971         
##  54           : 165         
##  80           : 132         
##  58           : 121         
##  52           : 110         
##  (Other)      :3564         
##  NA's         :6347         
##                                                                                                                                                                                                                      Survey.Response.Rate.Percent.Footnote
##  5 - Results are not available for this reporting period.                                                                                                                                                                               :4235             
##  6 - Fewer than 100 patients completed the HCAHPS survey. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance.                                                               :3916             
##  10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance.  :3850             
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.                                                                                      :1815             
##  1 - The number of cases/patients is too few to report.                                                                                                                                                                                 : 297             
##  6 - Fewer than 100 patients completed the HCAHPS survey. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance., 11 - There were discrepancies in the data collection process.: 143             
##  (Other)                                                                                                                                                                                                                                : 154             
##                                                                                                                                 Number.of.Completed.Surveys.Footnote
##  1 - The number of cases/patients is too few to report.                                                                                           : 297             
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:1815             
##  5 - Results are not available for this reporting period.                                                                                         :4235             
##  NA's                                                                                                                                             :8063             
##                                                                                                                                                                     
##                                                                                                                                                                     
## 
hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$Survey.Response.Rate.Percent.Footnote),] %>% summary()
##   Provider.ID     HCAHPS.Measure.ID HCAHPS.Linear.Mean.Value
##  Min.   : 10001   H_CLEAN : 3441    Min.   : 58.00          
##  1st Qu.:130028   H_COMP_1: 3441    1st Qu.: 83.00          
##  Median :250084   H_COMP_2: 3441    Median : 87.00          
##  Mean   :262769   H_COMP_3: 3441    Mean   : 86.41          
##  3rd Qu.:390100   H_COMP_4: 3441    3rd Qu.: 90.00          
##  Max.   :670103   H_COMP_5: 3441    Max.   :100.00          
##                   (Other) :17205                            
##  Number.of.Completed.Surveys
##  320    :  121              
##  322    :  110              
##  341    :  110              
##  104    :   99              
##  130    :   99              
##  138    :   99              
##  (Other):37213              
##                                                                                                                                                                                                                                                                                   Survey.Response.Rate.Percent.Footnote
##  1 - The number of cases/patients is too few to report.                                                                                                                                                                                                                                              :    0            
##  10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance.                                                               :    0            
##  10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance., 11 - There were discrepancies in the data collection process.:    0            
##  11 - There were discrepancies in the data collection process.                                                                                                                                                                                                                                       :    0            
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.                                                                                                                                                   :    0            
##  (Other)                                                                                                                                                                                                                                                                                             :    0            
##  NA's                                                                                                                                                                                                                                                                                                :37851            
##                                                                                                                                 Number.of.Completed.Surveys.Footnote
##  1 - The number of cases/patients is too few to report.                                                                                           :    0            
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:    0            
##  5 - Results are not available for this reporting period.                                                                                         :    0            
##  NA's                                                                                                                                             :37851            
##                                                                                                                                                                     
##                                                                                                                                                                     
## 
hcaphs_df_measures_distri <- hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$Survey.Response.Rate.Percent.Footnote),c(1,2,3)]
patient_exp_group <- hcaphs_df_measures_distri %>% spread(key = HCAHPS.Measure.ID, value = HCAHPS.Linear.Mean.Value)
summary(patient_exp_group)
##   Provider.ID        H_CLEAN         H_COMP_1         H_COMP_2     
##  Min.   : 10001   Min.   :71.00   Min.   : 73.00   Min.   : 75.00  
##  1st Qu.:130028   1st Qu.:85.00   1st Qu.: 90.00   1st Qu.: 90.00  
##  Median :250084   Median :87.00   Median : 92.00   Median : 92.00  
##  Mean   :262769   Mean   :87.31   Mean   : 91.34   Mean   : 91.91  
##  3rd Qu.:390100   3rd Qu.:90.00   3rd Qu.: 93.00   3rd Qu.: 93.00  
##  Max.   :670103   Max.   :99.00   Max.   :100.00   Max.   :100.00  
##     H_COMP_3       H_COMP_4         H_COMP_5        H_COMP_6    
##  Min.   :64.0   Min.   : 70.00   Min.   :58.00   Min.   :62.00  
##  1st Qu.:83.0   1st Qu.: 86.00   1st Qu.:76.00   1st Qu.:85.00  
##  Median :85.0   Median : 88.00   Median :79.00   Median :87.00  
##  Mean   :85.3   Mean   : 87.58   Mean   :78.78   Mean   :86.93  
##  3rd Qu.:88.0   3rd Qu.: 89.00   3rd Qu.:81.00   3rd Qu.:89.00  
##  Max.   :99.0   Max.   :100.00   Max.   :95.00   Max.   :99.00  
##     H_COMP_7      H_HSP_RATING      H_QUIET         H_RECMND     
##  Min.   :64.00   Min.   :65.00   Min.   :60.00   Min.   : 59.00  
##  1st Qu.:80.00   1st Qu.:87.00   1st Qu.:80.00   1st Qu.: 85.00  
##  Median :82.00   Median :89.00   Median :83.00   Median : 89.00  
##  Mean   :81.54   Mean   :88.74   Mean   :83.05   Mean   : 88.05  
##  3rd Qu.:83.00   3rd Qu.:91.00   3rd Qu.:86.00   3rd Qu.: 91.00  
##  Max.   :99.00   Max.   :99.00   Max.   :98.00   Max.   :100.00
pairs.panels(patient_exp_group[,-1],cex.labels = 0.6 ,cex.cor = 0.7,main = "Relationship between measures")

master_df  <- merge(x = safety_of_care_group,y = patient_exp_group,by = intersect(names(safety_of_care_group),names(patient_exp_group)),all = TRUE)
head(master_df)

Analysis of Timeliness and Effectiveness of Care Dataset

time_and_eff_care_df <- read.csv(file = "ValidFiles/Timely and Effective Care - Hospital.csv",header = T,check.names = T,stringsAsFactors = T,na.strings = c("Not Available",""))
str(time_and_eff_care_df)
## 'data.frame':    207174 obs. of  16 variables:
##  $ Provider.ID       : int  10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
##  $ Hospital.Name     : Factor w/ 4617 levels "ABBEVILLE AREA MEDICAL CENTER",..: 3685 3685 3685 3685 3685 3685 3685 3685 3685 3685 ...
##  $ Address           : Factor w/ 4789 levels " CALLE CONCEPCION VERA AYALA #550 S",..: 468 468 468 468 468 468 468 468 468 468 ...
##  $ City              : Factor w/ 2949 levels "ABBEVILLE","ABERDEEN",..: 699 699 699 699 699 699 699 699 699 699 ...
##  $ State             : Factor w/ 56 levels "AK","AL","AR",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ ZIP.Code          : int  36301 36301 36301 36301 36301 36301 36301 36301 36301 36301 ...
##  $ County.Name       : Factor w/ 1565 levels "ABBEVILLE","ACADIA",..: 668 668 668 668 668 668 668 668 668 668 ...
##  $ Phone.Number      : num  3.35e+09 3.35e+09 3.35e+09 3.35e+09 3.35e+09 ...
##  $ Condition         : Factor w/ 11 levels "Blood Clot Prevention and Treatment",..: 5 5 2 4 4 4 6 9 9 5 ...
##  $ Measure.ID        : Factor w/ 43 levels "AMI_7a","AMI_8a",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Measure.Name      : Factor w/ 43 levels "Anticoagulation overlap therapy",..: 16 32 19 9 10 11 14 22 18 26 ...
##  $ Score             : Factor w/ 582 levels "0","1","10","100",..: NA 568 NA 193 519 579 4 575 549 NA ...
##  $ Sample            : int  NA 24 NA 535 535 NA 337 537 3791 NA ...
##  $ Footnote          : Factor w/ 13 levels "1 - The number of cases/patients is too few to report.",..: 13 10 12 6 6 NA 10 6 NA 11 ...
##  $ Measure.Start.Date: Factor w/ 5 levels "01/01/2014","01/01/2015",..: 2 2 2 2 2 1 2 4 5 2 ...
##  $ Measure.End.Date  : Factor w/ 5 levels "03/31/2015","03/31/2016",..: 5 3 5 5 5 4 3 1 2 5 ...

cleaning the demographic variables from the dataset

zdemogrphic_vars <- which(names(time_and_eff_care_df) %in% zdemographics)
time_and_eff_care_cleaned <- time_and_eff_care_df[,-zdemogrphic_vars]
head(time_and_eff_care_cleaned)
summary(time_and_eff_care_cleaned)
##   Provider.ID                                   Condition    
##  Min.   : 10001   Emergency Department               :38544  
##  1st Qu.:140185   Stroke Care                        :38544  
##  Median :260037   Heart Attack or Chest Pain         :33726  
##  Mean   :267984   Blood Clot Prevention and Treatment:28908  
##  3rd Qu.:390211   Surgical Care Improvement Project  :28908  
##  Max.   :670112   Colonoscopy care                   : 9636  
##                   (Other)                            :28908  
##    Measure.ID    
##  AMI_7a :  4818  
##  AMI_8a :  4818  
##  CAC_3  :  4818  
##  ED_1b  :  4818  
##  ED_2b  :  4818  
##  EDV    :  4818  
##  (Other):178266  
##                                                   Measure.Name   
##  Anticoagulation overlap therapy                        :  4818  
##  Anticoagulation Therapy for Atrial Fibrillation/Flutter:  4818  
##  Antithrombotic Therapy by End of Hospital Day 2        :  4818  
##  Aspirin at Arrival                                     :  4818  
##  Assessed for Rehabilitation                            :  4818  
##  Discharged on Antithrombotic Therapy                   :  4818  
##  (Other)                                                :178266  
##      Score            Sample      
##  100    : 14414   Min.   :     0  
##  99     :  4502   1st Qu.:    45  
##  98     :  3912   Median :   118  
##  97     :  2918   Mean   :  2010  
##  0      :  2792   3rd Qu.:   405  
##  (Other): 49612   Max.   :506621  
##  NA's   :129024   NA's   :132305  
##                                                                                                                                               Footnote    
##  5 - Results are not available for this reporting period.                                                                                         :86714  
##  2 - Data submitted were based on a sample of cases/patients.                                                                                     :28612  
##  1 - The number of cases/patients is too few to report.                                                                                           : 9417  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 7095  
##  7 - No cases met the criteria for this measure.                                                                                                  : 6908  
##  (Other)                                                                                                                                          :21439  
##  NA's                                                                                                                                             :46989
time_and_eff_care_cleaned %>% group_by(Footnote) %>% tally()
time_and_eff_care_cleaned$Score <- as.integer(time_and_eff_care_cleaned$Score)
time_and_eff_care_cleaned[is.na(time_and_eff_care_cleaned$Footnote),] %>% head()
time_and_eff_care_imputed <- time_and_eff_care_cleaned[
  is.na(time_and_eff_care_cleaned$Footnote) | (time_and_eff_care_cleaned$Footnote %in% c("2 - Data submitted were based on a sample of cases/patients.")),] 
str(time_and_eff_care_imputed)
## 'data.frame':    75601 obs. of  7 variables:
##  $ Provider.ID : int  10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
##  $ Condition   : Factor w/ 11 levels "Blood Clot Prevention and Treatment",..: 4 4 4 9 9 4 4 4 4 3 ...
##  $ Measure.ID  : Factor w/ 43 levels "AMI_7a","AMI_8a",..: 4 5 6 8 9 11 13 14 15 18 ...
##  $ Measure.Name: Factor w/ 43 levels "Anticoagulation overlap therapy",..: 9 10 11 22 18 29 8 27 24 13 ...
##  $ Score       : int  193 519 579 575 549 112 509 578 337 550 ...
##  $ Sample      : int  535 535 NA 537 3791 371 377 111 58662 72 ...
##  $ Footnote    : Factor w/ 13 levels "1 - The number of cases/patients is too few to report.",..: 6 6 NA 6 NA NA NA NA NA NA ...
time_and_eff_care_imputed %>% summary()
##   Provider.ID                                   Condition    
##  Min.   : 10001   Emergency Department               :24437  
##  1st Qu.:120001   Stroke Care                        :13844  
##  Median :250123   Blood Clot Prevention and Treatment:12954  
##  Mean   :262222   Preventive Care                    : 7744  
##  3rd Qu.:390104   Colonoscopy care                   : 5358  
##  Max.   :670112   Heart Attack or Chest Pain         : 4785  
##                   (Other)                            : 6479  
##                   Measure.ID   
##  IMM_3_OP_27_FAC_ADHPCT: 4107  
##  IMM_2                 : 3637  
##  VTE_1                 : 3514  
##  ED_1b                 : 3452  
##  ED_2b                 : 3445  
##  OP_20                 : 3303  
##  (Other)               :54143  
##                                          Measure.Name       Score    
##  Healthcare workers given influenza vaccination: 4107   Min.   :  1  
##  Immunization for influenza                    : 3637   1st Qu.: 16  
##  Venous thromboembolism prophylaxis            : 3514   Median :482  
##  ED1                                           : 3452   Mean   :336  
##  ED2                                           : 3445   3rd Qu.:570  
##  Door to diagnostic eval                       : 3303   Max.   :582  
##  (Other)                                       :54143                
##      Sample      
##  Min.   :    11  
##  1st Qu.:    47  
##  Median :   123  
##  Mean   :  2078  
##  3rd Qu.:   409  
##  Max.   :506621  
##  NA's   :3283    
##                                                                                                                                                                                 Footnote    
##  2 - Data submitted were based on a sample of cases/patients.                                                                                                                       :28612  
##  1 - The number of cases/patients is too few to report.                                                                                                                             :    0  
##  1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients.                                                               :    0  
##  1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients., 3 - Results are based on a shorter time period than required.:    0  
##  1 - The number of cases/patients is too few to report., 3 - Results are based on a shorter time period than required.                                                              :    0  
##  (Other)                                                                                                                                                                            :    0  
##  NA's                                                                                                                                                                               :46989
time_and_eff_care_imputed <- time_and_eff_care_imputed[!is.na(time_and_eff_care_imputed$Sample),]
summary(time_and_eff_care_imputed)
##   Provider.ID                                   Condition    
##  Min.   : 10001   Emergency Department               :21154  
##  1st Qu.:120001   Stroke Care                        :13844  
##  Median :250117   Blood Clot Prevention and Treatment:12954  
##  Mean   :262085   Preventive Care                    : 7744  
##  3rd Qu.:390102   Colonoscopy care                   : 5358  
##  Max.   :670112   Heart Attack or Chest Pain         : 4785  
##                   (Other)                            : 6479  
##                   Measure.ID   
##  IMM_3_OP_27_FAC_ADHPCT: 4107  
##  IMM_2                 : 3637  
##  VTE_1                 : 3514  
##  ED_1b                 : 3452  
##  ED_2b                 : 3445  
##  OP_20                 : 3303  
##  (Other)               :50860  
##                                          Measure.Name       Score      
##  Healthcare workers given influenza vaccination: 4107   Min.   :  1.0  
##  Immunization for influenza                    : 3637   1st Qu.:  4.0  
##  Venous thromboembolism prophylaxis            : 3514   Median :445.0  
##  ED1                                           : 3452   Mean   :324.9  
##  ED2                                           : 3445   3rd Qu.:569.0  
##  Door to diagnostic eval                       : 3303   Max.   :578.0  
##  (Other)                                       :50860                  
##      Sample      
##  Min.   :    11  
##  1st Qu.:    47  
##  Median :   123  
##  Mean   :  2078  
##  3rd Qu.:   409  
##  Max.   :506621  
##                  
##                                                                                                                                                                                 Footnote    
##  2 - Data submitted were based on a sample of cases/patients.                                                                                                                       :28612  
##  1 - The number of cases/patients is too few to report.                                                                                                                             :    0  
##  1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients.                                                               :    0  
##  1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients., 3 - Results are based on a shorter time period than required.:    0  
##  1 - The number of cases/patients is too few to report., 3 - Results are based on a shorter time period than required.                                                              :    0  
##  (Other)                                                                                                                                                                            :    0  
##  NA's                                                                                                                                                                               :43706
# Samples Vs Score
ggplot(time_and_eff_care_imputed, aes(Measure.ID,Score)) + geom_col() + 
  labs(title = "Measure.ID Vs Scores") +
  theme(axis.text.x = element_text(angle = 60,vjust = 1,hjust = 1),
        plot.title = element_text(hjust = 0.5))

ggplot(time_and_eff_care_imputed, aes(Measure.ID)) + geom_bar() + 
  labs(title = "Measure.ID Vs Count of Providers") +
  theme(axis.text.x = element_text(angle = 90,vjust = 0.5,hjust = 1),
        plot.title = element_text(hjust = 0.5))

zvar1 <-  which(names(time_and_eff_care_imputed) %in% c("Provider.ID","Measure.ID","Score"))

timeliness_group <- time_and_eff_care_imputed[,zvar1] %>% spread(key = Measure.ID,value = Score)

zvar1 <- which(names(timeliness_group) %in% c("Provider.ID","ED_1b","ED_2b","OP_18b", "OP_3b", "OP_5", "OP_20","OP_21"))
zvar2 <- which(names(timeliness_group) %in% c("Provider.ID","CAC_3","IMM_2","IMM_3_OP_27_FAC_ADHPCT","OP_22","OP_23","OP_29","OP_30","OP_4","PC_01","STK_1", "STK_4", "STK_6","STK_8","VTE_1","VTE_2","VTE_3","VTE_5", "VTE_6"))

effectiveness_group <- timeliness_group[,zvar2]
timeliness_group <- timeliness_group[,zvar1]
pairs.panels(x = timeliness_group[2:length(zvar1)],cex.cor = 0.5,cex.labels = 0.9,ellipses = TRUE,pch = 21,bg = rainbow(length(zvar1)), main = "Timeliness Group variable correlation" )

- most of the measure variables of timeliness group are uncorrelated. - ED_1B and ED_2B are the inversely correlated to some extent as the timespent in the emergency room is the common between them. They are inversely correlated as the doctors visit reduces the time spend by the patient before moving to ED to inpatient room is reduced.

zvar1 <- which(names(effectiveness_group) %in% "IMM_3_OP_27_FAC_ADHPCT")
names(effectiveness_group)[zvar1] <- "IMM_3_OP_27"
cor.plot(effectiveness_group[,-1],
        stars = FALSE,numbers = TRUE,colors = TRUE,cex = 0.5, show.legend = FALSE,
         xlas = 2,cex.axis = 0.6,main = "Effectiveness group measure correlation")

- Variables represented by the effectiveness group have small degree of correlation

# merging master dataframe with timliness group
master_df <- merge(x = master_df,y = timeliness_group,by = intersect(names(master_df),names(timeliness_group)),all = TRUE)

# merging master dataframe with effectiveness group
master_df <- merge(x = master_df,y = effectiveness_group,by = intersect(names(master_df),names(effectiveness_group)),all = TRUE)
summary(master_df)
##   Provider.ID     Hospital.overall.rating COMP_HIP_KNEE   PSI_4_SURG_COMP 
##  Min.   : 10001   1   : 115               Min.   :1.500   Min.   : 70.79  
##  1st Qu.:140134   2   : 661               1st Qu.:2.700   1st Qu.:124.44  
##  Median :251304   3   :1668               Median :3.000   Median :135.57  
##  Mean   :265886   4   : 921               Mean   :3.042   Mean   :136.76  
##  3rd Qu.:390143   5   : 111               3rd Qu.:3.400   3rd Qu.:148.13  
##  Max.   :670112   NA's: 968               Max.   :6.000   Max.   :212.16  
##                                           NA's   :1730    NA's   :2626    
##  PSI_90_SAFETY     HAI_1_SIR        HAI_2_SIR        HAI_3_SIR     
##  Min.   :0.440   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.790   1st Qu.:0.2070   1st Qu.:0.1270   1st Qu.:0.3998  
##  Median :0.875   Median :0.4640   Median :0.4205   Median :0.8325  
##  Mean   :0.890   Mean   :0.5408   Mean   :0.4787   Mean   :0.9642  
##  3rd Qu.:0.960   3rd Qu.:0.7465   3rd Qu.:0.7008   3rd Qu.:1.4085  
##  Max.   :2.140   Max.   :5.1900   Max.   :7.0140   Max.   :5.2160  
##  NA's   :1220    NA's   :2077     NA's   :1566     NA's   :2404    
##    HAI_4_SIR       HAI_5_SIR        HAI_6_SIR         H_CLEAN     
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :71.00  
##  1st Qu.:0.000   1st Qu.:0.4280   1st Qu.:0.4920   1st Qu.:85.00  
##  Median :0.698   Median :0.8240   Median :0.7985   Median :87.00  
##  Mean   :0.871   Mean   :0.9892   Mean   :0.8170   Mean   :87.31  
##  3rd Qu.:1.339   3rd Qu.:1.3800   3rd Qu.:1.1140   3rd Qu.:90.00  
##  Max.   :4.819   Max.   :7.0170   Max.   :3.7410   Max.   :99.00  
##  NA's   :3588    NA's   :2619     NA's   :1198     NA's   :1003   
##     H_COMP_1         H_COMP_2         H_COMP_3       H_COMP_4     
##  Min.   : 73.00   Min.   : 75.00   Min.   :64.0   Min.   : 70.00  
##  1st Qu.: 90.00   1st Qu.: 90.00   1st Qu.:83.0   1st Qu.: 86.00  
##  Median : 92.00   Median : 92.00   Median :85.0   Median : 88.00  
##  Mean   : 91.34   Mean   : 91.91   Mean   :85.3   Mean   : 87.58  
##  3rd Qu.: 93.00   3rd Qu.: 93.00   3rd Qu.:88.0   3rd Qu.: 89.00  
##  Max.   :100.00   Max.   :100.00   Max.   :99.0   Max.   :100.00  
##  NA's   :1003     NA's   :1003     NA's   :1003   NA's   :1003    
##     H_COMP_5        H_COMP_6        H_COMP_7      H_HSP_RATING  
##  Min.   :58.00   Min.   :62.00   Min.   :64.00   Min.   :65.00  
##  1st Qu.:76.00   1st Qu.:85.00   1st Qu.:80.00   1st Qu.:87.00  
##  Median :79.00   Median :87.00   Median :82.00   Median :89.00  
##  Mean   :78.78   Mean   :86.93   Mean   :81.54   Mean   :88.74  
##  3rd Qu.:81.00   3rd Qu.:89.00   3rd Qu.:83.00   3rd Qu.:91.00  
##  Max.   :95.00   Max.   :99.00   Max.   :99.00   Max.   :99.00  
##  NA's   :1003    NA's   :1003    NA's   :1003    NA's   :1003   
##     H_QUIET         H_RECMND          ED_1b           ED_2b    
##  Min.   :60.00   Min.   : 59.00   Min.   :  5.0   Min.   :  1  
##  1st Qu.:80.00   1st Qu.: 85.00   1st Qu.:137.0   1st Qu.: 62  
##  Median :83.00   Median : 89.00   Median :186.0   Median :380  
##  Mean   :83.05   Mean   : 88.05   Mean   :206.2   Mean   :305  
##  3rd Qu.:86.00   3rd Qu.: 91.00   3rd Qu.:255.0   3rd Qu.:526  
##  Max.   :98.00   Max.   :100.00   Max.   :578.0   Max.   :578  
##  NA's   :1003    NA's   :1003     NA's   :992     NA's   :999  
##      OP_18b         OP_20           OP_21           OP_3b      
##  Min.   :  4    Min.   :  1.0   Min.   :  4.0   Min.   :  9.0  
##  1st Qu.: 37    1st Qu.: 94.0   1st Qu.:338.0   1st Qu.:316.0  
##  Median : 64    Median :183.0   Median :441.0   Median :432.0  
##  Mean   :116    Mean   :225.1   Mean   :409.8   Mean   :393.4  
##  3rd Qu.:101    3rd Qu.:337.0   3rd Qu.:499.0   3rd Qu.:492.0  
##  Max.   :578    Max.   :577.0   Max.   :578.0   Max.   :578.0  
##  NA's   :1143   NA's   :1141    NA's   :1263    NA's   :4056   
##       OP_5           CAC_3           IMM_2        IMM_3_OP_27 
##  Min.   :  1.0   Min.   :  1.0   Min.   :  1.0   Min.   :  4  
##  1st Qu.: 72.0   1st Qu.:482.5   1st Qu.:555.0   1st Qu.:534  
##  Median :431.0   Median :560.0   Median :572.0   Median :559  
##  Mean   :327.1   Mean   :447.2   Mean   :483.1   Mean   :527  
##  3rd Qu.:525.0   3rd Qu.:572.0   3rd Qu.:577.0   3rd Qu.:572  
##  Max.   :562.0   Max.   :578.0   Max.   :578.0   Max.   :578  
##  NA's   :2384    NA's   :4269    NA's   :807     NA's   :337  
##      OP_22           OP_23           OP_29           OP_30      
##  Min.   :  1.0   Min.   :  1.0   Min.   :  1.0   Min.   :  1.0  
##  1st Qu.:  2.0   1st Qu.:473.0   1st Qu.:337.0   1st Qu.:261.0  
##  Median :  2.0   Median :531.0   Median :534.0   Median :539.0  
##  Mean   :100.4   Mean   :472.2   Mean   :419.6   Mean   :406.9  
##  3rd Qu.:116.0   3rd Qu.:553.0   3rd Qu.:563.0   3rd Qu.:568.0  
##  Max.   :562.0   Max.   :577.0   Max.   :578.0   Max.   :578.0  
##  NA's   :1169    NA's   :3247    NA's   :1713    NA's   :1817   
##       OP_4          PC_01           STK_1           STK_4      
##  Min.   :  4    Min.   :  1.0   Min.   :  1.0   Min.   :  1.0  
##  1st Qu.:  4    1st Qu.:  1.0   1st Qu.:  4.0   1st Qu.:  4.0  
##  Median :568    Median :  2.0   Median :568.0   Median :534.0  
##  Mean   :375    Mean   :111.6   Mean   :355.3   Mean   :332.8  
##  3rd Qu.:575    3rd Qu.:227.0   3rd Qu.:577.0   3rd Qu.:568.0  
##  Max.   :578    Max.   :562.0   Max.   :578.0   Max.   :578.0  
##  NA's   :2404   NA's   :1966    NA's   :1795    NA's   :3547   
##      STK_6           STK_8           VTE_1           VTE_2      
##  Min.   :  4.0   Min.   :  1.0   Min.   :  1.0   Min.   :  1.0  
##  1st Qu.:  4.0   1st Qu.:  4.0   1st Qu.:554.0   1st Qu.:  4.0  
##  Median :569.0   Median :563.0   Median :570.0   Median :569.0  
##  Mean   :394.7   Mean   :390.3   Mean   :471.2   Mean   :365.9  
##  3rd Qu.:577.0   3rd Qu.:573.0   3rd Qu.:577.0   3rd Qu.:577.0  
##  Max.   :578.0   Max.   :578.0   Max.   :578.0   Max.   :578.0  
##  NA's   :1884    NA's   :2090    NA's   :930     NA's   :1532   
##      VTE_3          VTE_5          VTE_6       
##  Min.   :  4    Min.   :  4    Min.   :  1.00  
##  1st Qu.:  4    1st Qu.:  4    1st Qu.:  1.00  
##  Median :563    Median :534    Median :  1.00  
##  Mean   :412    Mean   :319    Mean   : 95.91  
##  3rd Qu.:572    3rd Qu.:569    3rd Qu.:116.00  
##  Max.   :578    Max.   :578    Max.   :570.00  
##  NA's   :1963   NA's   :2216   NA's   :3186

Analysing the Readmission and Mortality groups

readm_mort_df <- read.csv(file = "ValidFiles/Readmissions and Deaths - Hospital.csv",check.names = T,header = T,stringsAsFactors = T,na.strings = c("Not Available",""))
head(readm_mort_df)
zdemogrphic_vars <- which(names(readm_mort_df) %in% c(zdemographics,"Measure.Name","Compared.to.National"))
readm_mort_cleaned <- readm_mort_df[,-zdemogrphic_vars]
head(readm_mort_cleaned)
summary(readm_mort_cleaned)
##   Provider.ID            Measure.ID     Denominator          Score      
##  Min.   : 10001   MORT_30_AMI : 4818   Min.   :   25.0   Min.   : 1.40  
##  1st Qu.:140185   MORT_30_CABG: 4818   1st Qu.:   77.0   1st Qu.:12.10  
##  Median :260037   MORT_30_COPD: 4818   Median :  178.0   Median :15.20  
##  Mean   :267984   MORT_30_HF  : 4818   Mean   :  389.6   Mean   :14.51  
##  3rd Qu.:390211   MORT_30_PN  : 4818   3rd Qu.:  388.0   3rd Qu.:17.60  
##  Max.   :670112   MORT_30_STK : 4818   Max.   :26526.0   Max.   :31.30  
##                   (Other)     :38544   NA's   :25742     NA's   :25742  
##  Lower.Estimate  Higher.Estimate
##  Min.   : 0.80   Min.   : 2.20  
##  1st Qu.: 9.30   1st Qu.:15.40  
##  Median :12.50   Median :18.00  
##  Mean   :11.85   Mean   :17.73  
##  3rd Qu.:14.90   3rd Qu.:21.30  
##  Max.   :27.60   Max.   :35.30  
##  NA's   :25742   NA's   :25742  
##                                                                                                                                               Footnote    
##  1 - The number of cases/patients is too few to report.                                                                                           :11192  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 2282  
##  4 - Data suppressed by CMS for one or more quarters.                                                                                             :  686  
##  5 - Results are not available for this reporting period.                                                                                         :11280  
##  7 - No cases met the criteria for this measure.                                                                                                  :  302  
##  NA's                                                                                                                                             :41710  
## 

Imputing the NAs

  • Considering the Footnote variable as the reference variable. Assuming that the levels specified below encapsulate the NAs in the dataset. A summary rollover on the dataset breifs the situation that below levels holds NAs

1 - The number of cases/patients is too few to report. 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs. 4 - Data suppressed by CMS for one or more quarters.
5 - Results are not available for this reporting period. 7 - No cases met the criteria for this measure.

readm_mort_cleaned[!is.na(readm_mort_cleaned$Footnote),] %>% group_by(Footnote) %>% summarise(cnt_rows = n(),avg_score = mean(Score))
  • imputing the rows that are as a reslut of stale data from different levels of footnotes
readm_mort_cleaned <- readm_mort_cleaned[is.na(readm_mort_cleaned$Footnote),]
ggplot(readm_mort_cleaned,aes(Measure.ID,Score)) + geom_col() + 
  labs(title = "Mortality Readmission Rate Score") + ylab(label = "Provider Count") + 
  theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1),
        plot.title = element_text(hjust = 0.5)) 

  • Plotting the Measure variables
ggplot(readm_mort_cleaned,aes(Measure.ID)) + geom_bar() + labs(title = "Mortality Readmission Rate Count by Providers") + ylab(label = "Provider Count") +
  theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1),
        plot.title = element_text(hjust = 0.5)) 

  • Readmission measure that contribute to the overall count are:

READM-30-AMI Acute Myocardial Infarction (AMI) 30-Day Readmission Rate READM-30-COPD Chronic Obstructive Pulmonary Disease (COPD) 30-Day Readmission Rate READM-30-CABG Coronary Artery Bypass Graft (CABG) 30-Day Readmission Rate READM-30-HF Heart Failure (HF) 30-Day Readmission Rate READM-30-Hip-Knee Hospital-Level 30-Day All-Cause Risk- Standardized Readmission Rate (RSRR) Following Elective Total Hip Arthroplasty (THA)/ Total Knee Arthroplasty (TKA) READM-30-PN Pneumonia (PN) 30-Day Readmission Rate READM-30-STK Stroke (STK) 30-Day Readmission Rate READM-30-HOSP-WIDE HWR Hospital-Wide All-Cause Unplanned Readmission

  • Mortality group that contribute to Overall Hospital Rating are: MORT-30-AMI Acute Myocardial Infarction (AMI) 30-Day Mortality Rate MORT-30-CABG Coronary Artery Bypass Graft (CABG) 30-Day Mortality Rate MORT-30-COPD Chronic Obstructive Pulmonary Disease (COPD) 30-Day Mortality Rate MORT-30-HF Heart Failure (HF) 30-Day Mortality Rate MORT-30-PN Pneumonia (PN) 30-Day Mortality Rate MORT-30-STK Acute Ischemic Stroke (STK) 30-Day Mortality Rate
levels(readm_mort_cleaned$Measure.ID)
##  [1] "MORT_30_AMI"        "MORT_30_CABG"       "MORT_30_COPD"      
##  [4] "MORT_30_HF"         "MORT_30_PN"         "MORT_30_STK"       
##  [7] "READM_30_AMI"       "READM_30_CABG"      "READM_30_COPD"     
## [10] "READM_30_HF"        "READM_30_HIP_KNEE"  "READM_30_HOSP_WIDE"
## [13] "READM_30_PN"        "READM_30_STK"
zvar1 <- which(names(readm_mort_cleaned) %in% c("Provider.ID","Measure.ID","Score"))
readm_mort_final <- readm_mort_cleaned[,zvar1] %>% spread(key = Measure.ID,value = Score)

Separating the mortality measure and readmission measure

zvar1 <- which(names(readm_mort_final) %in% c("Provider.ID","MORT_30_AMI","MORT_30_CABG","MORT_30_COPD","MORT_30_HF","MORT_30_PN","MORT_30_STK"))
mortality_grp <- readm_mort_final[,zvar1]
zvar1 <- which(names(readm_mort_final) %in% c("MORT_30_AMI","MORT_30_CABG","MORT_30_COPD","MORT_30_HF","MORT_30_PN","MORT_30_STK"))
readmission_grp <- readm_mort_final[,-zvar1]
dim(readmission_grp)
## [1] 4415    9
dim(mortality_grp)
## [1] 4415    7

Checking the correlation between different measures

cor.plot(mortality_grp[,-1],cex.axis = 0.6, numbers = TRUE,cex = 0.7,stars = FALSE,show.legend = F,xlas = 2, main = "Mortality Group Correlation Plot")

cor.plot(readmission_grp[,-1],cex.axis = 0.6, numbers = TRUE,cex = 0.7,stars = FALSE,show.legend = F,xlas = 2, main = "Readmission Group Correlation Plot")

pairs.panels(readmission_grp[,-1],scale = TRUE,ellipses = TRUE,pch = 21,bg = rainbow(n = ncol(readmission_grp)),cex.labels = 0.7)

Ailments are significantly correlated.

master_df <- merge(x = master_df,y = readm_mort_final,by = intersect(x = names(master_df),y = names(readm_mort_final)),all = TRUE)

Analysis of Imaging dataset

op_imaging_eff_df <- read.csv(file = "ValidFiles/Outpatient Imaging Efficiency - Hospital.csv",header = T,check.names = T,na.strings = c("Not Available",""),stringsAsFactors = T)
head(op_imaging_eff_df)

Purging the variables that no useful for analysis

zdemogrphic_vars <- which(names(op_imaging_eff_df) %in% zdemographics)
op_imaging_eff_cleaned <- op_imaging_eff_df[,-zdemogrphic_vars]
head(op_imaging_eff_cleaned)
summary(op_imaging_eff_cleaned)
##   Provider.ID     Measure.ID  
##  Min.   : 10001   OP_10:4818  
##  1st Qu.:140185   OP_11:4818  
##  Median :260037   OP_13:4818  
##  Mean   :267984   OP_14:4818  
##  3rd Qu.:390211   OP_8 :4818  
##  Max.   :670112   OP_9 :4818  
##                               
##                                                                               Measure.Name 
##  Abdomen CT Use of Contrast Material                                                :4818  
##  Mammography Follow-up Rates                                                        :4818  
##  MRI Lumbar Spine for Low Back Pain                                                 :4818  
##  Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:4818  
##  Outpatients with brain CT scans who got a sinus CT scan at the same time           :4818  
##  Thorax CT Use of Contrast Material                                                 :4818  
##                                                                                            
##      Score       
##  Min.   : 0.000  
##  1st Qu.: 2.200  
##  Median : 5.000  
##  Mean   : 9.223  
##  3rd Qu.: 9.400  
##  Max.   :82.600  
##  NA's   :12595   
##                                                                                                                                               Footnote    
##  1 - The number of cases/patients is too few to report.                                                                                           : 5659  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:  978  
##  4 - Data suppressed by CMS for one or more quarters.                                                                                             :  295  
##  5 - Results are not available for this reporting period.                                                                                         : 3540  
##  7 - No cases met the criteria for this measure.                                                                                                  : 2123  
##  NA's                                                                                                                                             :16313  
## 
op_imaging_eff_cleaned[is.na(op_imaging_eff_cleaned$Footnote),] %>% summary()
##   Provider.ID     Measure.ID  
##  Min.   : 10001   OP_10:3629  
##  1st Qu.:140160   OP_11:3349  
##  Median :260015   OP_13:2233  
##  Mean   :265982   OP_14:2304  
##  3rd Qu.:390070   OP_8 :1524  
##  Max.   :670098   OP_9 :3274  
##                                                                               Measure.Name 
##  Abdomen CT Use of Contrast Material                                                :3629  
##  Mammography Follow-up Rates                                                        :3274  
##  MRI Lumbar Spine for Low Back Pain                                                 :1524  
##  Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:2233  
##  Outpatients with brain CT scans who got a sinus CT scan at the same time           :2304  
##  Thorax CT Use of Contrast Material                                                 :3349  
##      Score       
##  Min.   : 0.000  
##  1st Qu.: 2.200  
##  Median : 5.000  
##  Mean   : 9.223  
##  3rd Qu.: 9.400  
##  Max.   :82.600  
##                                                                                                                                               Footnote    
##  1 - The number of cases/patients is too few to report.                                                                                           :    0  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:    0  
##  4 - Data suppressed by CMS for one or more quarters.                                                                                             :    0  
##  5 - Results are not available for this reporting period.                                                                                         :    0  
##  7 - No cases met the criteria for this measure.                                                                                                  :    0  
##  NA's                                                                                                                                             :16313
op_imaging_eff_cleaned[!is.na(op_imaging_eff_cleaned$Score),] %>% summary()
##   Provider.ID     Measure.ID  
##  Min.   : 10001   OP_10:3629  
##  1st Qu.:140160   OP_11:3349  
##  Median :260015   OP_13:2233  
##  Mean   :265982   OP_14:2304  
##  3rd Qu.:390070   OP_8 :1524  
##  Max.   :670098   OP_9 :3274  
##                                                                               Measure.Name 
##  Abdomen CT Use of Contrast Material                                                :3629  
##  Mammography Follow-up Rates                                                        :3274  
##  MRI Lumbar Spine for Low Back Pain                                                 :1524  
##  Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:2233  
##  Outpatients with brain CT scans who got a sinus CT scan at the same time           :2304  
##  Thorax CT Use of Contrast Material                                                 :3349  
##      Score       
##  Min.   : 0.000  
##  1st Qu.: 2.200  
##  Median : 5.000  
##  Mean   : 9.223  
##  3rd Qu.: 9.400  
##  Max.   :82.600  
##                                                                                                                                               Footnote    
##  1 - The number of cases/patients is too few to report.                                                                                           :    0  
##  19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:    0  
##  4 - Data suppressed by CMS for one or more quarters.                                                                                             :    0  
##  5 - Results are not available for this reporting period.                                                                                         :    0  
##  7 - No cases met the criteria for this measure.                                                                                                  :    0  
##  NA's                                                                                                                                             :16313
op_imaging_eff_cleaned <- op_imaging_eff_cleaned[is.na(op_imaging_eff_cleaned$Footnote),] 

which rows have all NAs?

na_indices <- apply(master_df[,-c(1,2)], MARGIN = 1, function(x) all(is.na(x)))
sum(na_indices)
## [1] 0
paste0(round(prop.table(table(master_df$Hospital.overall.rating))*100,2),"%")
## [1] "2.5%"   "14.38%" "36.28%" "20.03%" "2.41%"  "24.39%"
library(mice)
## Loading required package: lattice
## 
## Attaching package: 'mice'
## The following object is masked from 'package:tidyr':
## 
##     complete
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
master_df_imputed <- mice(data = master_df[,-c(1,2)],seed = 100,maxit = 5,m = 5)
## 
##  iter imp variable
##   1   1  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   1   2  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   1   3  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   1   4  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   1   5  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   2   1  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   2   2  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   2   3  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   2   4  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   2   5  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   3   1  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   3   2  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   3   3  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   3   4  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   3   5  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   4   1  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   4   2  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   4   3  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   4   4  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   4   5  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   5   1  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   5   2  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   5   3  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   5   4  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
##   5   5  COMP_HIP_KNEE  PSI_4_SURG_COMP  PSI_90_SAFETY  HAI_1_SIR  HAI_2_SIR  HAI_3_SIR  HAI_4_SIR  HAI_5_SIR  HAI_6_SIR  H_CLEAN  H_COMP_1  H_COMP_2  H_COMP_3  H_COMP_4  H_COMP_5  H_COMP_6  H_COMP_7  H_HSP_RATING  H_QUIET  H_RECMND  ED_1b  ED_2b  OP_18b  OP_20  OP_21  OP_3b  OP_5  CAC_3  IMM_2  IMM_3_OP_27  OP_22  OP_23  OP_29  OP_30  OP_4  PC_01  STK_1  STK_4  STK_6  STK_8  VTE_1  VTE_2  VTE_3  VTE_5  VTE_6  MORT_30_AMI  MORT_30_CABG  MORT_30_COPD  MORT_30_HF  MORT_30_PN  MORT_30_STK  READM_30_AMI  READM_30_CABG  READM_30_COPD  READM_30_HF  READM_30_HIP_KNEE  READM_30_HOSP_WIDE  READM_30_PN  READM_30_STK  OP_10  OP_11  OP_13  OP_14  OP_8
master_df_imputed_final <-  complete(data = master_df_imputed,action = 5)
head(master_df_imputed_final)

Spliting the dataset into training and testing

set.seed(100)

master_df_final <- cbind(master_df[,c(1,2)],master_df_imputed_final)

#master_df_final <- master_df
# Sampling
indices <- sample(1:nrow(master_df_final),0.7 * nrow(master_df_final))

# training dataset
train_df <- master_df_final[indices,]
# test dataset
test_df <- master_df_final[-indices,]

Model building

using RandomForest Algorithm

set.seed(100)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(doParallel)
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: iterators
## Loading required package: parallel
doParallel::registerDoParallel(cl = 4,cores = 4)

train_control_rf <- trainControl(method = "repeatedcv",
                                  repeats = 5,
                                  number = 5,
                                  search = "grid",
                                  sampling = "smote",
                                 allowParallel = TRUE)

# Tuning grid parameters of Random Forest
tuning_grid_rf <- expand.grid(.mtry = round(sqrt(ncol(train_df[,-1]))),ntree = seq(100,1000,100)) 

# training the random forest with training dataset
model_rf <- randomForest(Hospital.overall.rating ~.,
                         data = train_df[,-1],
                         trControl = train_control_rf,
                         tuneGrid = tuning_grid_rf,
                         metric = "auc",
                         na.action = na.roughfix,
                         seed = 100)
model_rf
## 
## Call:
##  randomForest(formula = Hospital.overall.rating ~ ., data = train_df[,      -1], trControl = train_control_rf, tuneGrid = tuning_grid_rf,      metric = "auc", seed = 100, na.action = na.roughfix) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 8
## 
##         OOB estimate of  error rate: 42.71%
## Confusion matrix:
##          1   2    3   4  5 Missing class.error
## 1       18  58    0   0  0       0   0.7631579
## 2        1 266  188   2  0       3   0.4217391
## 3        0  37 1022  70  0      53   0.1353638
## 4        0   0  209 435  1      20   0.3458647
## 5        0   0    1  45 19       4   0.7246377
## Missing  1  54  496 129  2      83   0.8915033
  • Prediction using the Random Forest
vars_predict <- setdiff(x = names(train_df[,-1]),y = "Hospital.overall.rating")
predict_rf <- stats::predict(object = model_rf,test_df[vars_predict])
# Confusion Matrix
confusionMatrix(predict_rf,test_df$Hospital.overall.rating)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4   5 Missing
##    1         8   0   0   0   0       0
##    2        31 124  22   1   0      25
##    3         0  75 428  67   0     259
##    4         0   1  18 179  26      53
##    5         0   0   0   1  15       0
##    Missing   0   1  18   8   1      19
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5601          
##                  95% CI : (0.5335, 0.5865)
##     No Information Rate : 0.3522          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3895          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity          0.205128  0.61692   0.8807   0.6992  0.35714
## Specificity          1.000000  0.93299   0.5515   0.9128  0.99925
## Pos Pred Value       1.000000  0.61084   0.5163   0.6462  0.93750
## Neg Pred Value       0.977405  0.93458   0.8947   0.9302  0.98021
## Prevalence           0.028261  0.14565   0.3522   0.1855  0.03043
## Detection Rate       0.005797  0.08986   0.3101   0.1297  0.01087
## Detection Prevalence 0.005797  0.14710   0.6007   0.2007  0.01159
## Balanced Accuracy    0.602564  0.77495   0.7161   0.8060  0.67820
##                      Class: Missing
## Sensitivity                 0.05337
## Specificity                 0.97266
## Pos Pred Value              0.40426
## Neg Pred Value              0.74719
## Prevalence                  0.25797
## Detection Rate              0.01377
## Detection Prevalence        0.03406
## Balanced Accuracy           0.51301